#delimit;
clear;
set more off;
capture log close;

capture erase /data/doed_loan_raw.dta;

!st /data/doed_loan_raw.sas7bdat /data/doed_loan_raw.dta

/*****
CHANGE PATH
*****/;
local in "/data";
local pathtab "/data";

/****************
I'm dropping the loans that were matched based on matching criteria=5
I drop cancelled loans and parent plus loans
*****************/;

use "`in'/doed_loan_raw.dta", clear;

rename tuedt2003 pid;

format dt %td;
format per_beg_dt %td;
format per_end_dt %td;
format loan_stat_dt %td;
format curr_mat_dt %td;
format curr_dis_dt %td;
format curr_can_dt %td;
format out_prin_bal_dt %td;

/*The cases where match_criteria==5 look very bad. I'm going to drop them*/
drop if match_criteria==5;

/*Dropping loans that were cancelled*/
drop if tot_dis==0;

gen loan_type_num=1 if loan_type=="D1";
replace loan_type_num=2 if loan_type=="D2";
replace loan_type_num=3 if loan_type=="D3";
replace loan_type_num=4 if loan_type=="D4";
replace loan_type_num=5 if loan_type=="D5";
replace loan_type_num=6 if loan_type=="D6";
replace loan_type_num=7 if loan_type=="D7";
replace loan_type_num=8 if loan_type=="EU";
replace loan_type_num=9 if loan_type=="FI";
replace loan_type_num=10 if loan_type=="IC";
replace loan_type_num=11 if loan_type=="GB";
replace loan_type_num=12 if loan_type=="NU";
replace loan_type_num=13 if loan_type=="PE";
replace loan_type_num=14 if loan_type=="PK";
replace loan_type_num=15 if loan_type=="PL";
replace loan_type_num=16 if loan_type=="SE";
replace loan_type_num=17 if loan_type=="SF";
replace loan_type_num=18 if loan_type=="SL";
replace loan_type_num=19 if loan_type=="SU";
replace loan_type_num=20 if loan_type=="SG";
replace loan_type_num=21 if loan_type=="CL";
replace loan_type_num=22 if loan_type=="PU";
replace loan_type_num=23 if loan_type=="DU";
replace loan_type_num=24 if loan_type=="RF";

label var loan_type_num "Type of loan (numeric var of loan_type)";
label define loan_type_lab 1 "1=Direct Staf Sub" 2 "2=Direct Staf Unsub" 3 "3=Direct Plus Grad" 4 "4=Direct plus"
	 5 "5=Direct cons Unsub" 6 "6=Direct cons sub" 7 "7=Direct plus cons" 8 "8=Perk Expanded Lending" 9 "9=Federalluy Insured (FISL)"
	 10 "10=ICL" 11 "11=FFEL PLUS Grad" 12 "12=NDSL" 13 "13=Fed Pell Grant" 14 "14=Fed Perkins Loan" 15 "15=FFEL PLUS Loan" 16 "16=FSEOG" 17 "17=FFEL Staf Sub" 18 "18=Supplemental Loan (SLS)"
	 19 "19=FFEL Staf Unsub" 20 "20=Nat'l Sci. & Math Access to retain talent grant" 21 "21=FFEL Cons Loan"  22 "22=Perk Loan"  23 "23=SNat'l Defense Loan"  24 "24=FFEL Refi Loan";
label values loan_type_num loan_type_lab;

/*Consolidated loan*/;
gen cons_loan=(loan_type_num==5 | loan_type_num==6 | loan_type_num==21);
/*Parent Plus-As difference from plus and plus grad*/;
gen plus_par_loan=(loan_type_num==4 | loan_type_num==15);

/*If the loan is a consolidated loan, per_beg_dt and per_end_dt has no meaning, and they almost always take the value 01jan2001. I'll replace them for missing*/
replace per_beg_dt=. if cons_loan==1;
replace per_end_dt=. if cons_loan==1;

/*I will also replace the opeid by missing if the loan is consolidated, since these loans have an opeid equal to 88888888800*/;
replace opeid="" if cons_loan==1;

/*I don't want to keep parents loans*/
drop if plus_par_loan==1;

gen loan_stat_num=1 if loan_stat=="AE";
replace loan_stat_num=2 if loan_stat=="AL";
replace loan_stat_num=3 if loan_stat=="BC";
replace loan_stat_num=4 if loan_stat=="BK";
replace loan_stat_num=5 if loan_stat=="CA";
replace loan_stat_num=6 if loan_stat=="CS";
replace loan_stat_num=7 if loan_stat=="DA";
replace loan_stat_num=8 if loan_stat=="DB";
replace loan_stat_num=9 if loan_stat=="DC";
replace loan_stat_num=10 if loan_stat=="DD";
replace loan_stat_num=11 if loan_stat=="DE";
replace loan_stat_num=12 if loan_stat=="DF";
replace loan_stat_num=13 if loan_stat=="DI";
replace loan_stat_num=14 if loan_stat=="DK";
replace loan_stat_num=15 if loan_stat=="DL";
replace loan_stat_num=16 if loan_stat=="DN";
replace loan_stat_num=17 if loan_stat=="DO";
replace loan_stat_num=18 if loan_stat=="DP";
replace loan_stat_num=19 if loan_stat=="DR";
replace loan_stat_num=20 if loan_stat=="DS";
replace loan_stat_num=21 if loan_stat=="DT";
replace loan_stat_num=22 if loan_stat=="DU";
replace loan_stat_num=23 if loan_stat=="DW";
replace loan_stat_num=24 if loan_stat=="DX";
replace loan_stat_num=25 if loan_stat=="DZ";
replace loan_stat_num=26 if loan_stat=="FB";
replace loan_stat_num=27 if loan_stat=="FC";
replace loan_stat_num=28 if loan_stat=="FR";
replace loan_stat_num=29 if loan_stat=="IA";
replace loan_stat_num=30 if loan_stat=="ID";
replace loan_stat_num=31 if loan_stat=="IG";
replace loan_stat_num=32 if loan_stat=="IM";
replace loan_stat_num=33 if loan_stat=="IP";
replace loan_stat_num=34 if loan_stat=="OD";
replace loan_stat_num=35 if loan_stat=="PF";
replace loan_stat_num=36 if loan_stat=="PM";
replace loan_stat_num=37 if loan_stat=="PN";
replace loan_stat_num=38 if loan_stat=="RF";
replace loan_stat_num=39 if loan_stat=="RH";
replace loan_stat_num=40 if loan_stat=="RP";
replace loan_stat_num=41 if loan_stat=="UA";
replace loan_stat_num=42 if loan_stat=="UB";
replace loan_stat_num=43 if loan_stat=="UC";
replace loan_stat_num=44 if loan_stat=="UD";
replace loan_stat_num=45 if loan_stat=="UI";
replace loan_stat_num=46 if loan_stat=="XC";

label var loan_stat_num "Loan status as of loan_stat_dt (numeric var of loan_stat)";

label define loan_stat_lab 1 "1=Loan transferred to new holder" 2 "2=Abandoned Loan" 3 "3=Bankruptcy claim, discharged" 4 "4=Bankruptcy claim, active"
	5 "5=Canceled" 6 "6=Closed school discharge" 7 "7=Deferred" 8 "8=Defaulted then bankrupt, active chapter 13" 9 "9=Defaulted, compromise"
	10 "10=Defaulted, then died" 11 "11=Death" 12 "12=Defaulted, unresolved" 13 "13=Disability" 14 "14=Defaulted, then bankrupt, discharged chapter 13"
	15 "15=Defaulted, in litigation" 16 "16=Defaulted, then paid in ful by consolidation" 17 "17=Defaulted, then bankrupt, active, other" 
	18 "18=Defautled, paid in full" 19 "19=Defaulted loan included in a roll-up loan" 20 "20=Defaulted, then disabled" 21 "21=Defaulted, collection terminated"
	22 "22=Defaulted, unresolved" 23 "23=Defaulted, write off" 24 "24=Defaulted, six consecutive payments" 25 "25=Defaulted, six consecutive payments, then missed payment"
	26 "26=Forbearance" 27 "27=False certification discharge" 28 "28=Fraud" 29 "29=Loan originated" 30 "30=In school or grace period" 31 "31=In grace period"
	32 "32=In military grace" 33 "33=In post - deferment grace" 34 "34=Defaulted, then bankrupt, discharged, other" 35 "35=Paid in full" 36 "36=Presumed paid-in-full"
	37 "37=Paid in full through consolidation loan" 38 "38=Refinanced" 39 "39=Loan transferred by DCS to Sallie Mae" 40 "40=In repayment"
	41 "41=Temporarily uninsured - loan not in default" 42 "42=Temporarily uninsured - loan default" 43 "43=Permanently uninsured/Unreinsured - loan not in default"
	44 "44=Premenantly uninsured/Unreinsured - loan in default" 45 "45=Uninsured/Unreinsured" 46 "46=Defaulted, six consecutive payments";
label values loan_stat_num loan_stat_lab;


label var dt "Date the loan was originated";
label var per_beg_dt "Beginning of Period for the loan (indicates beginning of enrollment, I think)";
label var per_end_dt "Ending of Period for the loan (indicates ending of enrollment, I think)";
label var loan_stat_dt "Date reported status became effective";
label var curr_mat_dt "Day when repayment starts";
label var int_rt "Interest rate";
label var int_rt_code "Whether rate is fixed or flexible";
label var loan_grty "Amount agreed to be disbursed";
label var tot_dis "Amount actually disbursed";
label var curr_dis_dt "Date when the amount was disbursed";
label var tot_can "Amount cancelled (not disbursed)";
label var curr_can_dt "date when the loan was cancelled";
label var out_prin_bal "Outstanding pricipal balance by out_prin_bal_dt (incl capitalized interest)";
label var out_prin_bal_dt "Date corresponding to out_prin_bal";
label var loan_seq_no "Number of loan";

gen closed_loan=(loan_stat_num==2 | loan_stat_num==3 | loan_stat_num==5 | loan_stat_num==6
	 | loan_stat_num==10 | loan_stat_num==11 | loan_stat_num==13 | loan_stat_num==14
	 | loan_stat_num==16 | loan_stat_num==18 | loan_stat_num==20 | loan_stat_num==23
	 | loan_stat_num==34 | loan_stat_num==35 | loan_stat_num==37);

gen closed_loan_dt=loan_stat_dt if closed_loan==1;	
format closed_loan_dt %td;

/***Keeping the date of the first time the person defaulted**********/
gen defaulted=inlist(loan_stat_num,2,3,4,8,9,10,12,14,15,16,17,18,19,20,21,22,23,24,25,34,42,44,46);
gen defaulted_dt_t=loan_stat_dt if defaulted==1;
bys pid: egen defaulted_dt=min(defaulted_dt_t);
format defaulted_dt %td;
drop defaulted_dt_t;


merge m:1 pid using "`pathtab'/age_tomerge.dta";
keep if _merge==3;
drop _m;


drop if ((dt-dob)/365<15); /*Dropping loans that were opened before the person turned 15y.o.*/
drop if ((per_beg_dt-dob)/365<15); /*This drops 63 loans. They all have per_beg_dt=Jan1900 and the loan type is PU. They are clearly wrong*/

keep pid loan_seq_no loan_type dt closed_loan closed_loan_dt defaulted_dt;

bys pid loan_seq_no loan_type dt: egen closed_dt=max(closed_loan_dt);
format closed_dt %td;
drop closed_loan_dt;
drop closed_loan;
bys pid loan_seq_no loan_type dt: keep if _n==1;

replace closed_dt=mdy(1,1,2020) if closed_dt==.;
drop loan_type loan_seq_no;
egen group=group(pid dt);

gsort group -closed_dt;
by group: keep if _n==1;
drop group;

gen counter=1;
local i=1;
while `i'~=0 {;
capture drop maxsumtoupdate;
sort pid dt closed_dt;
by pid: egen sumcounter=sum(counter);

gen todrop=1 if pid==pid[_n-1] & dt<=closed_dt[_n-1];
gen toupdate=1 if pid==pid[_n+1] & closed_dt>=dt[_n+1] & todrop==.;

gen sumtoupdate=sum(toupdate);
replace sumtoupdate=0 if toupdate==. & todrop==.;

bys sumtoupdate: egen maxclosed_dt=max(closed_dt);
replace maxclosed_dt=. if sumtoupdate==0;
sort pid dt closed_dt;

gen closed_dt_new=closed_dt;
replace closed_dt_new=maxclosed_dt if maxclosed_dt~=.;

tab todrop;
drop if todrop==1;  /*47,015*/ /*199*/ /*3*/ /*2*/ /*1*/ 

egen maxsumtoupdate=max(sumtoupdate);
local i=maxsumtoupdate;

drop todrop toupdate sumtoupdate maxclosed_dt closed_dt sumcounter;
rename closed_dt_new closed_dt;
};
drop maxsumtoupdate;

by pid: gen open=sum(counter);
sum open;
local m=r(max);
forvalues i=1(1)`m' {;
gen dtopen`i'_t=dt if open==`i';
gen dtclo`i'_t=closed_dt  if open==`i';
replace dtclo`i'=mdy(2,1,2008) if dtclo`i'==mdy(1,1,2020);
bys pid: egen start`i'=max(dtopen`i'_t);
bys pid: egen end`i'=max(dtclo`i'_t);
};
sort pid;
keep pid star* end* defaulted_dt;
rename start* nslds_start*;
rename end* nslds_end*;
format nslds_* %td;
bys pid: keep if _n==1;


save "`pathtab'/open_close_sl_accounts_nslds.dta", replace;

